In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from itables import show
import os
import glob
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
tqdm.pandas()
files = sorted(glob.glob("data/classified/climate_tweets_*.csv"))
print(files)
pd.set_option('display.max_colwidth', -1)
['data/classified/climate_tweets_2006.csv', 'data/classified/climate_tweets_2007.csv', 'data/classified/climate_tweets_2008.csv', 'data/classified/climate_tweets_2009.csv', 'data/classified/climate_tweets_2010.csv', 'data/classified/climate_tweets_2011.csv', 'data/classified/climate_tweets_2012.csv', 'data/classified/climate_tweets_2013.csv', 'data/classified/climate_tweets_2014.csv', 'data/classified/climate_tweets_2015.csv', 'data/classified/climate_tweets_2016.csv', 'data/classified/climate_tweets_2017.csv', 'data/classified/climate_tweets_2018.csv', 'data/classified/climate_tweets_2019.csv', 'data/classified/climate_tweets_2020.csv']
<ipython-input-1-31bef66501b7>:11: FutureWarning: Passing a negative integer is deprecated in version 1.0 and will not be supported in future version. Instead, use None to not limit the column width.
  pd.set_option('display.max_colwidth', -1)
In [2]:
%%time
df = pd.concat(pd.read_csv(f, sep=";", engine="c", lineterminator="\n") for f in tqdm(files))
CPU times: user 13min 54s, sys: 1min 44s, total: 15min 38s
Wall time: 22min 15s
In [3]:
print(len(df))
32335076
In [51]:
df=df.drop_duplicates(subset="tweet_id")
print(len(df))
32261854
In [52]:
%%time
df.timestamp = pd.to_datetime(df.timestamp)
df.index = df.timestamp
CPU times: user 201 ms, sys: 96 ms, total: 297 ms
Wall time: 295 ms
/home/ubuntu/.local/lib/python3.8/site-packages/pandas/core/generic.py:5491: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value
In [53]:
daycounts = df.timestamp.resample("D").count()
daycounts[daycounts == 0].index
Out[53]:
DatetimeIndex(['2006-06-07', '2006-06-08', '2006-06-09', '2006-06-10',
               '2006-06-11', '2006-06-12', '2006-06-13', '2006-06-14',
               '2006-06-15', '2006-06-16',
               ...
               '2010-03-15', '2010-03-16', '2010-03-17', '2010-03-18',
               '2010-03-19', '2010-03-20', '2010-03-21', '2019-08-29',
               '2020-02-17', '2020-02-18'],
              dtype='datetime64[ns]', name='timestamp', length=230, freq=None)
In [54]:
df.columns
Out[54]:
Index(['username', 'fullname', 'user_id', 'tweet_id', 'tweet_url', 'timestamp',
       'timestamp_epochs', 'replies', 'retweets', 'likes', 'is_retweet',
       'retweeter_username', 'retweeter_userid', 'retweet_id', 'text', 'html',
       'text_with_emoji', 'neg', 'neu', 'pos', 'compound'],
      dtype='object')
In [55]:
df.timestamp.resample("M").count().median()
Out[55]:
158799.5
In [56]:
df.timestamp.resample("M").count().plot(figsize=(15,15), title="Climate twitter - number of tweets")
df.timestamp[df["compound"] > 0.05].resample("M").count().plot(color='g')
df.timestamp[df["compound"] < -0.05].resample("M").count().plot(color='r')
df.timestamp[df["compound"].between(-.05, .05)].resample("M").count().plot()
plt.legend(["All tweets", "Positive tweets", "Negative tweets", "Neutral tweets"])
plt.ylabel("# of tweets per month")
plt.show()
In [57]:
#et_by_month_pc = et_by_month.count() / by_month.count() * 100

all_tweets_monthly = df.timestamp.resample("M").count()
pos_tweets = df.timestamp[df["compound"] > 0.05].resample("M").count()
pos_tweets_pc = pos_tweets / all_tweets_monthly * 100
neg_tweets = df.timestamp[df["compound"] < -0.05].resample("M").count()
neg_tweets_pc = neg_tweets / all_tweets_monthly * 100
neutral_tweets = df.timestamp[df["compound"].between(-.05, .05)].resample("M").count()
neutral_tweets_pc = neutral_tweets / all_tweets_monthly * 100

pos_tweets_pc.plot(figsize=(15,15), title="Climate twitter - tweet sentiment as percentage of all climate tweets - by month",color="g")
neg_tweets_pc.plot(color='r')
neutral_tweets_pc.plot()
plt.legend(["Positive tweets", "Negative tweets", "Neutral tweets"])
plt.xlabel("year")
plt.ylabel("% of tweets per month")
plt.show()


#et_by_month_pc = et_by_month.count() / by_month.count() * 100
all_tweets_monthly = df.timestamp.resample("W").count()
pos_tweets = df.timestamp[df["compound"] > 0.05].resample("W").count()
pos_tweets_pc = pos_tweets / all_tweets_monthly * 100
neg_tweets = df.timestamp[df["compound"] < -0.05].resample("W").count()
neg_tweets_pc = neg_tweets / all_tweets_monthly * 100
neutral_tweets = df.timestamp[df["compound"].between(-.05, .05)].resample("W").count()
neutral_tweets_pc = neutral_tweets / all_tweets_monthly * 100

pos_tweets_pc.plot(figsize=(15,15), title="Climate twitter - tweet sentiment as percentage of all climate tweets - by week",color="g")
neg_tweets_pc.plot(color='r')
neutral_tweets_pc.plot()
plt.legend(["Positive tweets", "Negative tweets", "Neutral tweets"])
plt.xlabel("year")
plt.ylabel("% of tweets per month")
plt.show()
In [58]:
%%time
def display_filtered_df(df, pattern, n=5000, extracols = []):
    if len(df) < n:
        filtered_df = df
    else:
        filtered_df = df.sample(n)
    filtered_df["text_with_emoji_formatted"] = filtered_df.text_with_emoji.str.replace(pattern, lambda x: "<b>" + x.group() + "</b>", case=False)
    show(filtered_df[["text_with_emoji_formatted", "compound"] + extracols],
        order=[[2, 'desc']],
        orderClasses=False,
        createdRow="""function( row, data, dataIndex ) {
          if (data[2] > 0) {
            $(row).css('background-color', 'rgba(0,255,0,.2)');
          } else if (data[2] == 0) {
            $(row).css('background-color', 'rgba(0,0,255,.2)');
          } else {
            $(row).css('background-color', 'rgba(255,0,0,.2)');
          }
        }""", scrollY="800px", scrollCollapse=True, paging=False, columnDefs=[{"width": "120px", "targets": "_all"}])

pattern = r"global.?warming|climate.?chang|sea.?level.?ris|rising.?sea.?level|climate.?crisis|climate.?action|extreme.?weather|biodiversity|IPCC|Paris.?accord"
display_filtered_df(df, pattern)
<timed exec>:6: FutureWarning: The default value of regex will change from True to False in a future version.
WARNING:itables.downsample:showing 2500x2 of 5000x2 as nbytes=80000>65536=maxBytes. See https://mwouts.github.io/itables/#downsampling
text_with_emoji_formatted compound
timestamp
CPU times: user 2.63 s, sys: 96.1 ms, total: 2.73 s
Wall time: 2.71 s
In [59]:
df["compound"].resample("M").mean().plot(figsize=(20,20), title="Mean sentiment per month")
Out[59]:
<AxesSubplot:title={'center':'Mean sentiment per month'}, xlabel='timestamp'>
In [60]:
df.timestamp[df.text.str.contains(r"\bhot\b", na=False)].resample("M").count().plot(figsize=(15, 15), title="Global hot vs cold tweets", color="r")
df.timestamp[df.text.str.contains(r"\bcold\b", na=False)].resample("M").count().plot(color="b")
plt.ylabel("# of tweets per month")
plt.show()
In [61]:
print(df.columns)
display_filtered_df(df[df.likes > 2e5], pattern, 500, ["username", "likes"])
Index(['username', 'fullname', 'user_id', 'tweet_id', 'tweet_url', 'timestamp',
       'timestamp_epochs', 'replies', 'retweets', 'likes', 'is_retweet',
       'retweeter_username', 'retweeter_userid', 'retweet_id', 'text', 'html',
       'text_with_emoji', 'neg', 'neu', 'pos', 'compound'],
      dtype='object')
<timed exec>:6: FutureWarning: The default value of regex will change from True to False in a future version.
<timed exec>:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
text_with_emoji_formatted compound username likes
timestamp
In [62]:
display_filtered_df(df[df.username == "realDonaldTrump"], pattern)
<timed exec>:6: FutureWarning: The default value of regex will change from True to False in a future version.
<timed exec>:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
text_with_emoji_formatted compound
timestamp
In [63]:
%%time
pattern = r"fake|not real|isn'?t real|doesn'?t exist|hoax|propaganda|conspiracy"
deniers = df[df.text.str.contains(pattern, case=False, na=False)]
print(len(deniers))
display_filtered_df(deniers, pattern)
<timed exec>:6: FutureWarning: The default value of regex will change from True to False in a future version.
WARNING:itables.downsample:showing 2500x2 of 5000x2 as nbytes=80000>65536=maxBytes. See https://mwouts.github.io/itables/#downsampling
941086
text_with_emoji_formatted compound
timestamp
CPU times: user 10min 52s, sys: 261 ms, total: 10min 52s
Wall time: 10min 52s
In [64]:
deniers.timestamp.resample("M").count().plot(figsize=(15,15), title="Climate denial twitter - number of tweets")
plt.ylabel("# of tweets per month")
plt.show()
In [65]:
all_tweets_monthly = df.timestamp.resample("M").count()
denier_tweets = deniers.timestamp.resample("M").count()
denier_tweet_pc = denier_tweets / all_tweets_monthly * 100

denier_tweet_pc.plot(figsize=(15,15), title="Climate denier twitter - percentage of all climate tweets - by month",color="r")
plt.ylabel("% of tweets per month")
plt.show()
In [66]:
#%%time
# Save datasets of unique users and ids for bot scoring. Takes ~13min
#df[["username", "user_id"]].value_counts().to_csv("data/users.csv")
In [67]:
botscores = pd.read_csv("data/botscores.csv")
botscores
Out[67]:
user_id screen_name prob_bot n_tweets
0 731164510489313280 GCCThinkActTank 0.368905 63791
1 330509973 annemariayritys 0.007271 47821
2 176470818 Vandahmier 0.159065 41108
3 1079911297 NaN NaN 39807
4 2365497828 ZEROCO2_ 0.001946 31124
... ... ... ... ...
319325 456582197 SAPTelcoMedia 0.400230 13
319326 1154755772 Heavenhood 0.009159 13
319327 211486342 TNL4Lifestyle 0.841821 13
319328 23234151 blogshot 0.414494 13
319329 3329942253 gokid_carpool 0.203911 13

319330 rows × 4 columns

In [68]:
%%time
merged = pd.merge(df, botscores, on="user_id")
merged
CPU times: user 2min 51s, sys: 8.24 s, total: 2min 59s
Wall time: 2min 59s
Out[68]:
username fullname user_id tweet_id tweet_url timestamp timestamp_epochs replies retweets likes ... text html text_with_emoji neg neu pos compound screen_name prob_bot n_tweets
0 eversion Rob Annable 46673 1893063 /eversion/status/1893063 2006-12-31 10:47:25 1167562045 0 0 0 ... Warmer climate. Wider grain. Easier to cut. Thank you global warming. <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Warmer climate. Wider grain. Easier to cut. Thank you <strong>global warming</strong>.</p> Warmer climate. Wider grain. Easier to cut. Thank you global warming . 0.115 0.385 0.500 0.7184 eversion 0.004161 14
1 eversion Rob Annable 46673 826756380 /eversion/status/826756380 2008-06-04 11:43:26 1212579806 0 0 0 ... Failing to blog images from Birmingham's Climate Change festival because of broken iPhone camera <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Failing to blog images from Birmingham's <strong>Climate Change</strong> festival because of broken iPhone camera</p> Failing to blog images from Birmingham's Climate Change festival because of broken iPhone camera 0.311 0.534 0.155 -0.4939 eversion 0.004161 14
2 eversion Rob Annable 46673 812123666 /eversion/status/812123666 2008-05-15 17:36:29 1210872989 0 0 0 ... Wondering what I'm going to do with the stall I've been offered at the Birmingham Climate Change Festival <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Wondering what I'm going to do with the stall I've been offered at the Birmingham <strong>Climate Change</strong> Festival</p> Wondering what I'm going to do with the stall I've been offered at the Birmingham Climate Change Festival 0.086 0.762 0.152 0.3400 eversion 0.004161 14
3 eversion Rob Annable 46673 5534149643 /eversion/status/5534149643 2009-11-08 15:53:14 1257695594 0 0 0 ... Kids on climate change and low energy buildings: "Dad, is that why you've been working so much? Because you've got to save the earth?" <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Kids on <strong>climate change</strong> and low energy buildings: "Dad, is that why you've been working so much? Because you've got to save the earth?"</p> Kids on climate change and low energy buildings: "Dad, is that why you've been working so much? Because you've got to save the earth?" 0.073 0.730 0.197 0.5514 eversion 0.004161 14
4 eversion Rob Annable 46673 12450421385723904 /eversion/status/12450421385723904 2010-12-08 10:16:26 1291803386 0 0 0 ... at the #passivhaus trip debrief with Learning Skills for Climate Change folks. <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">at the <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/passivhaus?src=hash"><s>#</s><b>passivhaus</b></a> trip debrief with Learning Skills for <strong>Climate Change</strong> folks.</p> at the #passivhaus trip debrief with Learning Skills for Climate Change folks. 0.000 1.000 0.000 0.0000 eversion 0.004161 14
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
19699878 TheHipHopMayor Likwid Nomana 1206256844827697152 1213523146885779456 /TheHipHopMayor/status/1213523146885779456 2020-01-04 18:10:39 1578161439 0 0 0 ... We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZApic.twitter.com/wXkllYlPSM <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="in">We had <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/20plentyHipHopSessions?src=hash"><s>#</s><b>20plentyHipHopSessions</b></a> Ko Tlhageng yesterday at Ventersdorp. <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/Blacktwitter?src=hash"><s>#</s><b>Blacktwitter</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/ClimateChange?src=hash"><s>#</s><b><strong>ClimateChange</strong></b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/IranWar?src=hash"><s>#</s><b>IranWar</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/hiphop?src=hash"><s>#</s><b>hiphop</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/music?src=hash"><s>#</s><b>music</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/beats?src=hash"><s>#</s><b>beats</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/RhymeFestZA?src=hash"><s>#</s><b>RhymeFestZA</b></a><a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/wXkllYlPSM">pic.twitter.com/wXkllYlPSM</a></p> We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZA pic.twitter.com/wXkllYlPSM 0.000 1.000 0.000 0.0000 TheHipHopMayor 0.002180 23
19699879 TheHipHopMayor Likwid Nomana 1206256844827697152 1213523113503285249 /TheHipHopMayor/status/1213523113503285249 2020-01-04 18:10:31 1578161431 0 0 0 ... We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZApic.twitter.com/ltDetqUdWW <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="in">We had <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/20plentyHipHopSessions?src=hash"><s>#</s><b>20plentyHipHopSessions</b></a> Ko Tlhageng yesterday at Ventersdorp. <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/Blacktwitter?src=hash"><s>#</s><b>Blacktwitter</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/ClimateChange?src=hash"><s>#</s><b><strong>ClimateChange</strong></b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/IranWar?src=hash"><s>#</s><b>IranWar</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/hiphop?src=hash"><s>#</s><b>hiphop</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/music?src=hash"><s>#</s><b>music</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/beats?src=hash"><s>#</s><b>beats</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/RhymeFestZA?src=hash"><s>#</s><b>RhymeFestZA</b></a><a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/ltDetqUdWW">pic.twitter.com/ltDetqUdWW</a></p> We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZA pic.twitter.com/ltDetqUdWW 0.000 1.000 0.000 0.0000 TheHipHopMayor 0.002180 23
19699880 TheHipHopMayor Likwid Nomana 1206256844827697152 1213523088027062274 /TheHipHopMayor/status/1213523088027062274 2020-01-04 18:10:25 1578161425 0 0 0 ... We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZApic.twitter.com/MfsPu9687W <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="in">We had <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/20plentyHipHopSessions?src=hash"><s>#</s><b>20plentyHipHopSessions</b></a> Ko Tlhageng yesterday at Ventersdorp. <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/Blacktwitter?src=hash"><s>#</s><b>Blacktwitter</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/ClimateChange?src=hash"><s>#</s><b><strong>ClimateChange</strong></b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/IranWar?src=hash"><s>#</s><b>IranWar</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/hiphop?src=hash"><s>#</s><b>hiphop</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/music?src=hash"><s>#</s><b>music</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/beats?src=hash"><s>#</s><b>beats</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/RhymeFestZA?src=hash"><s>#</s><b>RhymeFestZA</b></a><a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/MfsPu9687W">pic.twitter.com/MfsPu9687W</a></p> We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZA pic.twitter.com/MfsPu9687W 0.000 1.000 0.000 0.0000 TheHipHopMayor 0.002180 23
19699881 TheHipHopMayor Likwid Nomana 1206256844827697152 1213523070117404672 /TheHipHopMayor/status/1213523070117404672 2020-01-04 18:10:20 1578161420 0 0 0 ... We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZApic.twitter.com/ci341dGBoc <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="in">We had <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/20plentyHipHopSessions?src=hash"><s>#</s><b>20plentyHipHopSessions</b></a> Ko Tlhageng yesterday at Ventersdorp. <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/Blacktwitter?src=hash"><s>#</s><b>Blacktwitter</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/ClimateChange?src=hash"><s>#</s><b><strong>ClimateChange</strong></b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/IranWar?src=hash"><s>#</s><b>IranWar</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/hiphop?src=hash"><s>#</s><b>hiphop</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/music?src=hash"><s>#</s><b>music</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/beats?src=hash"><s>#</s><b>beats</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/RhymeFestZA?src=hash"><s>#</s><b>RhymeFestZA</b></a><a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/ci341dGBoc">pic.twitter.com/ci341dGBoc</a></p> We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZA pic.twitter.com/ci341dGBoc 0.000 1.000 0.000 0.0000 TheHipHopMayor 0.002180 23
19699882 TheHipHopMayor Likwid Nomana 1206256844827697152 1213522649260929031 /TheHipHopMayor/status/1213522649260929031 2020-01-04 18:08:40 1578161320 0 0 0 ... We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZA <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="in">We had <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/20plentyHipHopSessions?src=hash"><s>#</s><b>20plentyHipHopSessions</b></a> Ko Tlhageng yesterday at Ventersdorp. <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/Blacktwitter?src=hash"><s>#</s><b>Blacktwitter</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/ClimateChange?src=hash"><s>#</s><b><strong>ClimateChange</strong></b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/IranWar?src=hash"><s>#</s><b>IranWar</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/hiphop?src=hash"><s>#</s><b>hiphop</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/music?src=hash"><s>#</s><b>music</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/beats?src=hash"><s>#</s><b>beats</b></a> <a class="twitter-hashtag pretty-link js-nav" data-query-source="hashtag_click" dir="ltr" href="/hashtag/RhymeFestZA?src=hash"><s>#</s><b>RhymeFestZA</b></a></p> We had #20plentyHipHopSessions Ko Tlhageng yesterday at Ventersdorp. #Blacktwitter #ClimateChange #IranWar #hiphop #music #beats #RhymeFestZA 0.000 1.000 0.000 0.0000 TheHipHopMayor 0.002180 23

19699883 rows × 24 columns

In [69]:
merged.index = merged.timestamp
bot_tweets = merged[merged.prob_bot > .5]
human_tweets = merged[merged.prob_bot < .5]
In [70]:
bot_tweets.timestamp.resample("M").count().plot(figsize=(15,15), title="Climate twitter - number of bot tweets")
bot_tweets.timestamp[bot_tweets["compound"] > 0.05].resample("M").count().plot(color='g')
bot_tweets.timestamp[bot_tweets["compound"] < -0.05].resample("M").count().plot(color='r')
bot_tweets.timestamp[bot_tweets["compound"].between(-.05, .05)].resample("M").count().plot()
plt.legend(["All tweets", "Positive tweets", "Negative tweets", "Neutral tweets"])
plt.ylabel("# of bot tweets per month")
plt.show()
In [71]:
all_tweets_monthly = bot_tweets.timestamp.resample("M").count()
pos_tweets = bot_tweets.timestamp[bot_tweets["compound"] > 0.05].resample("M").count()
pos_tweets_pc = pos_tweets / all_tweets_monthly * 100
neg_tweets = bot_tweets.timestamp[bot_tweets["compound"] < -0.05].resample("M").count()
neg_tweets_pc = neg_tweets / all_tweets_monthly * 100
neutral_tweets = bot_tweets.timestamp[bot_tweets["compound"].between(-.05, .05)].resample("M").count()
neutral_tweets_pc = neutral_tweets / all_tweets_monthly * 100

pos_tweets_pc.plot(figsize=(15,15), title="Climate twitter - tweet sentiment as percentage of all bot climate tweets - by month",color="g")
neg_tweets_pc.plot(color='r')
neutral_tweets_pc.plot()
plt.legend(["Positive tweets", "Negative tweets", "Neutral tweets"])
plt.xlabel("year")
plt.ylabel("% of bot tweets per month")
plt.show()
In [72]:
pattern = r"global.?warming|climate.?chang|sea.?level.?ris|rising.?sea.?level|climate.?crisis|climate.?action|extreme.?weather|biodiversity|IPCC|Paris.?accord"
display_filtered_df(bot_tweets, pattern)
<timed exec>:6: FutureWarning: The default value of regex will change from True to False in a future version.
WARNING:itables.downsample:showing 2500x2 of 5000x2 as nbytes=80000>65536=maxBytes. See https://mwouts.github.io/itables/#downsampling
text_with_emoji_formatted compound
timestamp
In [73]:
human_tweets.timestamp.resample("M").count().plot(figsize=(15,15), title="Climate twitter - number of non-bot tweets")
human_tweets.timestamp[human_tweets["compound"] > 0.05].resample("M").count().plot(color='g')
human_tweets.timestamp[human_tweets["compound"] < -0.05].resample("M").count().plot(color='r')
human_tweets.timestamp[human_tweets["compound"].between(-.05, .05)].resample("M").count().plot()
plt.legend(["All tweets", "Positive tweets", "Negative tweets", "Neutral tweets"])
plt.ylabel("# of non-bot tweets per month")
plt.show()
In [74]:
all_tweets_monthly = human_tweets.timestamp.resample("M").count()
pos_tweets = human_tweets.timestamp[human_tweets["compound"] > 0.05].resample("M").count()
pos_tweets_pc = pos_tweets / all_tweets_monthly * 100
neg_tweets = human_tweets.timestamp[human_tweets["compound"] < -0.05].resample("M").count()
neg_tweets_pc = neg_tweets / all_tweets_monthly * 100
neutral_tweets = human_tweets.timestamp[human_tweets["compound"].between(-.05, .05)].resample("M").count()
neutral_tweets_pc = neutral_tweets / all_tweets_monthly * 100

pos_tweets_pc.plot(figsize=(15,15), title="Climate twitter - tweet sentiment as percentage of all non-bot climate tweets - by month",color="g")
neg_tweets_pc.plot(color='r')
neutral_tweets_pc.plot()
plt.legend(["Positive tweets", "Negative tweets", "Neutral tweets"])
plt.xlabel("year")
plt.ylabel("% of non-bot tweets per month")
plt.show()
In [75]:
%%time
deniers_merged = pd.merge(deniers, botscores, on="user_id")
deniers_merged
CPU times: user 3.97 s, sys: 0 ns, total: 3.97 s
Wall time: 3.96 s
Out[75]:
username fullname user_id tweet_id tweet_url timestamp timestamp_epochs replies retweets likes ... text html text_with_emoji neg neu pos compound screen_name prob_bot n_tweets
0 jhayrocas Jay 926061 543666102 /jhayrocas/status/543666102 2007-12-29 13:00:26 1198933226 0 0 0 ... who said global warming is a hoax, it's december and i'm sweating in silang! <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">who said <strong>global warming</strong> is a hoax, it's december and i'm sweating in silang!</p> who said global warming is a hoax, it's december and i'm sweating in silang! 0.150 0.750 0.100 -0.2003 jhayrocas 0.000852 15
1 Rubenerd Ruben Schade 🔰 875971 520722232 /Rubenerd/status/520722232 2007-12-21 10:39:15 1198233555 0 0 0 ... @IntoYourHead I heard from Hank that Global Warming is a conspiracy cooked up by polar bears or something to stop artic exploration! <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en"><a class="twitter-atreply pretty-link js-nav" data-mentioned-user-id="49491971" dir="ltr" href="/intoyourhead"><s>@</s><b>IntoYourHead</b></a> I heard from Hank that <strong>Global Warming</strong> is a conspiracy cooked up by polar bears or something to stop artic exploration!</p> @IntoYourHead I heard from Hank that Global Warming is a conspiracy cooked up by polar bears or something to stop artic exploration! 0.215 0.657 0.128 -0.5255 Rubenerd 0.023427 35
2 Rubenerd Ruben Schade 🔰 875971 800304651 /Rubenerd/status/800304651 2008-04-30 12:16:12 1209557772 0 0 0 ... @ons I've almost finished my report on the "Oil companies discrediting global warming to save their business" conspiracy theory <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en"><a class="twitter-atreply pretty-link js-nav" data-mentioned-user-id="219275799" dir="ltr" href="/ONS"><s>@</s><b>ons</b></a> I've almost finished my report on the "Oil companies discrediting <strong>global warming</strong> to save their business" conspiracy theory</p> @ons I've almost finished my report on the "Oil companies discrediting global warming to save their business" conspiracy theory 0.140 0.661 0.198 0.1027 Rubenerd 0.023427 35
3 conservatweet conservatweet 9335232 490757572 /conservatweet/status/490757572 2007-12-11 17:41:10 1197394870 0 0 0 ... [Hot Air]: Max Mayfield destroys Democrats’ global warming conspiracy: Imperfect storm. http://tinyurl.com/2vlhas <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">[Hot Air]: Max Mayfield destroys Democrats’ <strong>global warming</strong> conspiracy: Imperfect storm. http://tinyurl.com/2vlhas</p> [Hot Air]: Max Mayfield destroys Democrats’ global warming conspiracy: Imperfect storm. http://tinyurl.com/2vlhas 0.492 0.423 0.085 -0.8271 NaN NaN 2265
4 conservatweet conservatweet 9335232 1082298586 /conservatweet/status/1082298586 2008-12-28 14:15:25 1230473725 1 0 0 ... [ACE] 2008 Was the Beginning of the End for the Global Warming Hoax: Back in July, I wrote "I suspect that .. http://tinyurl.com/8rgmpu <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">[ACE] 2008 Was the Beginning of the End for the <strong>Global Warming</strong> Hoax: Back in July, I wrote "I suspect that .. http://tinyurl.com/8rgmpu</p> [ACE] 2008 Was the Beginning of the End for the Global Warming Hoax: Back in July, I wrote "I suspect that .. http://tinyurl.com/8rgmpu 0.166 0.772 0.062 -0.4019 NaN NaN 2265
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
513148 MisterTimmr1 Old Timmr 4879535764 1212176671731634176 /MisterTimmr1/status/1212176671731634176 2020-01-01 01:00:14 1577840414 0 0 0 ... @LindseyGrahamSC also said climate change was real. He's a flake or fake. Take your pick. <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en"><a class="twitter-atreply pretty-link js-nav" data-mentioned-user-id="432895323" dir="ltr" href="/LindseyGrahamSC"><s>@</s><b>LindseyGrahamSC</b></a> also said <strong>climate change</strong> was real. He's a flake or fake. Take your pick.</p> @LindseyGrahamSC also said climate change was real. He's a flake or fake. Take your pick. 0.181 0.819 0.000 -0.4767 MisterTimmr1 0.001588 28
513149 right_populist TruthTeller 1209227323079372810 1212167901379207169 /right_populist/status/1212167901379207169 2020-01-01 00:25:23 1577838323 2 0 1 ... It’s funny because he probably just search up 'climate change is a hoax' and looked for an article and shared it with us. Haha <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">It’s funny because he probably just search up '<strong>climate change</strong> is a hoax' and looked for an article and shared it with us. Haha</p> It’s funny because he probably just search up ' climate change is a hoax' and looked for an article and shared it with us. Haha 0.067 0.669 0.264 0.7351 RightMinarchist 0.001528 14
513150 KarlHaymann Karlos 1205040259903287298 1212165790964649985 /KarlHaymann/status/1212165790964649985 2020-01-01 00:17:00 1577837820 0 0 0 ... The conspiracy theory is already doing the rounds amongst RWNJ that greens and climate change activists are starting them to blame Climate change, go figure. pic.twitter.com/YEFTEErKD1 <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">The conspiracy theory is already doing the rounds amongst RWNJ that greens and <strong>climate change</strong> activists are starting them to blame <strong>Climate change</strong>, go figure. <a class="twitter-timeline-link u-hidden" data-pre-embedded="true" dir="ltr" href="https://t.co/YEFTEErKD1">pic.twitter.com/YEFTEErKD1</a></p> The conspiracy theory is already doing the rounds amongst RWNJ that greens and climate change activists are starting them to blame Climate change , go figure. pic.twitter.com/YEFTEErKD1 0.188 0.812 0.000 -0.7003 NaN NaN 38
513151 TheRealGremlich Michael Nolen 3332482995 1212162390395101185 /TheRealGremlich/status/1212162390395101185 2020-01-01 00:03:29 1577837009 0 0 0 ... Look at this from 2004. Our country was being in danger back then, not from climate change, but hoaxers. It's not just unending wars, it's this making elitists and globalists money. Trump is dangerous because he found out.\n\n https://amp.theguardian.com/environment/2004/feb/22/usnews.theobserver?__twitter_impression=true … <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Look at this from 2004. Our country was being in danger back then, not from <strong>climate change</strong>, but hoaxers. It's not just unending wars, it's this making elitists and globalists money. Trump is dangerous because he found out.\n\n <a class="twitter-timeline-link" data-expanded-url="https://amp.theguardian.com/environment/2004/feb/22/usnews.theobserver?__twitter_impression=true" dir="ltr" href="https://t.co/CzmXGTG36x" rel="nofollow noopener" target="_blank" title="https://amp.theguardian.com/environment/2004/feb/22/usnews.theobserver?__twitter_impression=true"><span class="tco-ellipsis"></span><span class="invisible">https://</span><span class="js-display-url">amp.theguardian.com/environment/20</span><span class="invisible">04/feb/22/usnews.theobserver?__twitter_impression=true</span><span class="tco-ellipsis"><span class="invisible"> </span>…</span></a></p> Look at this from 2004. Our country was being in danger back then, not from climate change , but hoaxers. It's not just unending wars, it's this making elitists and globalists money. Trump is dangerous because he found out.\n\n https://amp.theguardian.com/environment/2004/feb/22/usnews.theobserver?__twitter_impression=true … 0.132 0.788 0.081 -0.3536 NaN NaN 28
513152 lafendad Lafendad 1206865510891237376 1212161581741748226 /lafendad/status/1212161581741748226 2020-01-01 00:00:16 1577836816 1 0 0 ... Or how about an international scientific journal that makes the connection between the 1980s Acid Rain scare and the current Climate Change hoax for you?\n\nhttps://principia-scientific.org/anatomy-of-an-environmental-scare/ … <p class="TweetTextSize js-tweet-text tweet-text" data-aria-label-part="0" lang="en">Or how about an international scientific journal that makes the connection between the 1980s Acid Rain scare and the current <strong>Climate Change</strong> hoax for you?\n\n<a class="twitter-timeline-link u-hidden" data-expanded-url="https://principia-scientific.org/anatomy-of-an-environmental-scare/" dir="ltr" href="https://t.co/xMACJIKqsa" rel="nofollow noopener" target="_blank" title="https://principia-scientific.org/anatomy-of-an-environmental-scare/"><span class="tco-ellipsis"></span><span class="invisible">https://</span><span class="js-display-url">principia-scientific.org/anatomy-of-an-</span><span class="invisible">environmental-scare/</span><span class="tco-ellipsis"><span class="invisible"> </span>…</span></a></p> Or how about an international scientific journal that makes the connection between the 1980s Acid Rain scare and the current Climate Change hoax for you?\n\n https://principia-scientific.org/anatomy-of-an-environmental-scare/ … 0.175 0.825 0.000 -0.6486 NaN NaN 15

513153 rows × 24 columns

In [76]:
deniers_merged.index = deniers_merged.timestamp
In [77]:
all_tweets_monthly = merged[~merged.prob_bot.isna()].timestamp.resample("M").count()
bot_denier_tweets = deniers_merged[deniers_merged.prob_bot > .5].timestamp.resample("M").count()
human_denier_tweets = deniers_merged[deniers_merged.prob_bot < .5].timestamp.resample("M").count()
bot_denier_tweet_pc = bot_denier_tweets / all_tweets_monthly * 100
human_denier_tweet_pc = human_denier_tweets / all_tweets_monthly * 100

bot_denier_tweet_pc.plot(figsize=(15,15), title="Climate denier twitter - percentage of all climate tweets - by month",color="r",label="Bots")
human_denier_tweet_pc.plot(color='b', label="Humans")
plt.legend(["Bots", "Humans"])
plt.ylabel("% of tweets per month")
plt.xlabel("year")
plt.show()
In [78]:
pattern = r"fake|not real|isn'?t real|doesn'?t exist|hoax|propaganda|conspiracy"
bot_deniers = deniers_merged[deniers_merged.prob_bot > .5]
display_filtered_df(bot_deniers, pattern)
<timed exec>:6: FutureWarning: The default value of regex will change from True to False in a future version.
WARNING:itables.downsample:showing 2500x2 of 5000x2 as nbytes=80000>65536=maxBytes. See https://mwouts.github.io/itables/#downsampling
text_with_emoji_formatted compound
timestamp